This script creates Seurat object from the aligned count data and performs QC based on Seurat functions. miQC is used to filter out low quality cells in a given library.
For more information or updates, please see Seurat.
To run the Rscript from the command line sequentially, use:
Rscript -e "rmarkdown::render('seurat_alignment_qc.Rmd', clean = TRUE,
params=list(scooter_path='/scooter',
results_dir='.',
data_path='/path/to/cellranger/output',
sample_name='pbmc_1k_v3',
min_genes=200,
normalize_method='log_norm',
num_pcs=30
))"
attach(params)
The following objects are masked _by_ .GlobalEnv:
data_path, results_dir, sample_name
The following objects are masked from params (pos = 6):
assay, data_path, grouping, log_file, min_genes, normalize_method, num_dim, num_neighbors, num_pcs, prefix, results_dir,
sample_name, scooter_path
#load_all(scooter_path)
load_all("/Users/chronia/CHOP/GitHub/scooter") # from https://github.com/igordot/scooter
# Load counts to a list of matrices
counts_mat <- load_sample_counts_matrix(path = data_path,
sample_name = sample_name)
loading counts matrix for sample: 7316-371
loading counts matrix dir: /Users/chronia/CHOP/GitHub/single-cell-analysis/data/10x-wf/7316-371/7316-371/outs/filtered_feature_bc_matrix
========== import cell ranger counts matrix ==========
# Create seurat object using gene expression data
seurat_obj <- create_seurat_obj(counts_matrix = counts_mat[["Gene Expression"]],
assay = "RNA",
log_file = NULL)
========== create seurat object ==========
input cells: 3564
input genes: 33940
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
# Save raw Seurat object
saveRDS(seurat_obj, file = paste0(results_dir, "/", "seurat_obj_raw.rds"))
# How many cells are in the seurat object?
cells_before_filter_num <- ncol(seurat_obj)
There are 3564 cells in the raw seurat object.
Plot distribution of the number of genes, UMI, and percent mitochondrial reads per cell.
# Plot number of genes
genes_unfilt <- plot_distribution(seurat_obj,
features = "nFeature_RNA",
grouping = grouping) +
geom_hline(yintercept = min_genes, color = "#D41159", size = 2) +
#geom_hline(yintercept = max(seurat_obj@meta.data$nFeature_RNA), color = "#D41159", size = 2) +
scale_fill_manual(values = alpha(c("#10559a"), .4)) +
#annotate(geom="text", x=0.7, y = max(seurat_obj@meta.data$nFeature_RNA), label= glue(""), color="#D41159") +
annotate(geom="text", x=0.7, y = min_genes - 200, label = glue("Min genes: {min_genes}"), color="#D41159")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
# plot number of UMIs
umi_unfilt <- plot_distribution(seurat_obj,
features = "nCount_RNA",
grouping = grouping) +
scale_fill_manual(values = alpha(c("#10559a"), .4))
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
# plot percent mitochondrial reads
mito_unfilt <- plot_distribution(seurat_obj,
features = "pct_mito",
grouping = grouping) +
#geom_hline(yintercept = max(seurat_obj@meta.data$pct_mito), color = "#D41159", size = 2) +
scale_fill_manual(values = alpha(c("#10559a"), .4))
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
#annotate(geom="text", x=0.7, y = max(seurat_obj@meta.data$pct_mito), label = glue(""), color="#D41159")
# get the legend for one of the plots to use as legend for the combined plot
legend_grid <- get_legend(mito_unfilt)
title <- ggdraw() +
draw_label("Unfiltered-data", fontface = 'bold', x = 0, hjust = 0) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7))
# Combine plots
plot_row <- plot_grid(genes_unfilt + theme(legend.position = "none"),
umi_unfilt + theme(legend.position = "none"),
mito_unfilt + theme(legend.position = "none"),
legend_grid,
ncol = 4)
plot_grid(title, plot_row, ncol = 1, rel_heights = c(0.1, 1))
We will use miQC to identify low-quality cells. miQC R package jointly models proportion of reads belonging to mitochondrial genes and number of unique genes detected. Cells with a high likelihood of being compromised (greater than 0.75) and cells that do not pass a minimum number of unique genes detected threshold of 200 will be removed from the counts matrix object.
sce <- as.SingleCellExperiment(seurat_obj)
In order to calculate the percent of reads in a cell that map to mitochondrial genes, we first need to establish which genes are mitochondrial. For genes listed as HGNC symbols, this is as simple as searching for genes starting with mt-. For other IDs, we recommend using a biomaRt query to map to chromosomal location and identify all mitochondrial genes.
# Set seed
set.seed(2023)
# Identify cells with mtDNA in the library
#mt_genes <- grepl("^mt-", rownames(sce))
mt_genes <- grepl(sce$pct_mito, rownames(sce))
Warning: argument 'pattern' has length > 1 and only the first element will be used
feature_ctrls <- list(mito = rownames(sce)[mt_genes])
feature_ctrls
$mito
[1] "MIR1302-2HG" "ENSG00000238009" "ENSG00000268903" "ENSG00000241860" "ENSG00000241599" "ENSG00000228463" "ENSG00000237094"
[8] "ENSG00000230021" "ENSG00000228327" "LINC01409" "ENSG00000230092" "LINC01128" "LINC00115" "ENSG00000288531"
[15] "ENSG00000272438" "ENSG00000230699" "ENSG00000241180" "ENSG00000272512" "ENSG00000231702" "ENSG00000217801" "ENSG00000285812"
[22] "LINC01342" "TTLL10" "ENSG00000260179" "LINC01786" "ENSG00000240731" "MRPL20-AS1" "MRPL20"
[29] "MRPL20-DT" "LINC01770" "ENSG00000284740" "TMEM240" "ENSG00000215014" "FNDC10" "ENSG00000286989"
[36] "ENSG00000272106" "ENSG00000272004" "ENSG00000269737" "ENSG00000268575" "ENSG00000227775" "ENSG00000231050" "ENSG00000233542"
[43] "ENSG00000271806" "FAAP20" "ENSG00000234396" "ENSG00000287356" "ENSG00000272161" "ENSG00000269896" "ENSG00000272420"
[50] "PEX10" "ENSG00000224387" "ENSG00000272449" "ENSG00000285945" "ENSG00000279839" "ENSG00000272235" "ENSG00000287828"
[57] "ENSG00000286518" "ENSG00000238260" "CEP104" "LINC01134" "LINC02780" "ENSG00000284703" "ENSG00000287586"
[64] "ENSG00000236948" "ENSG00000285629" "RNF207-AS1" "RNF207" "LINC00337" "ENSG00000231868" "ENSG00000229519"
[71] "LINC01672" "ENSG00000284744" "ENSG00000237365" "ENSG00000270171" "ENSG00000270035" "ENSG00000269978" "ENSG00000269925"
[78] "ENSG00000236266" "ENSG00000284747" "ENSG00000284716" "ENSG00000238290" "ENSG00000232848" "ENSG00000288816" "LINC01714"
[85] "ENSG00000226545" "LINC02606" "ENSG00000231181" "TMEM201" "ENSG00000233268" "ENSG00000280113" "ENSG00000285701"
[92] "ENSG00000228150" "ENSG00000284735" "ENSG00000284642" "ENSG00000271989" "ENSG00000203469" "ENSG00000272078" "EXOSC10"
[99] "ENSG00000226849" "EXOSC10-AS1" "ENSG00000284646" "ENSG00000284708" "ENSG00000285646" "KIAA2013" "LINC02766"
[106] "ENSG00000288927" "ENSG00000272482" "ENSG00000237445" "ENSG00000253085" "ENSG00000289380" "ENSG00000231606" "ENSG00000287756"
[113] "ENSG00000228140" "ENSG00000272510" "ENSG00000237301" "ENSG00000237938" "ENSG00000178715" "ENSG00000234607" "ENSG00000227959"
[120] "ENSG00000224621" "ENSG00000288398" "LINC01772" "ENSG00000261135" "ENSG00000285853" "ENSG00000282143" "ENSG00000280114"
[127] "ENSG00000282740" "ENSG00000238142" "ENSG00000272426" "ENSG00000226526" "LINC02783" "ARHGEF10L" "LINC02810"
[134] "ENSG00000284653" "ENSG00000280222" "ENSG00000225387" "ENSG00000272084" "ENSG00000286064" "ENSG00000270728" "RNU6-1099P"
[141] "MICOS10" "ENSG00000235185" "ENSG00000226396" "ENSG00000235434" "ENSG00000227066" "UBXN10" "ENSG00000284710"
[148] "LINC01141" "ENSG00000226487" "ENSG00000235432" "ENSG00000287192" "LINC02596" "ENSG00000283234" "ENSG00000285959"
[155] "LINC01635" "LINC00339" "ENSG00000285873" "ZBTB40" "ENSG00000240553" "ENSG00000289014" "LINC01355"
[162] "ENSG00000284726" "ENSG00000232482" "ENSG00000285802" "ENSG00000229239" "SRSF10" "ENSG00000225315" "ENSG00000288982"
[169] "ENSG00000284699" "ENSG00000233755" "ENSG00000284602" "ENSG00000284657" "ENSG00000272432" "ENSG00000261349" "TMEM50A"
[176] "ENSG00000225643" "ENSG00000233478" "ENSG00000255054" "ENSG00000228172" "ENSG00000236528" "SLC30A2" "ENSG00000284309"
[183] "FAM110D" "ENSG00000223583" "ENSG00000260063" "ENSG00000289554" "NR0B2" "ENSG00000224311" "ENSG00000243659"
[190] "ENSG00000241169" "ENSG00000237429" "LINC02574" "ENSG00000225886" "ENSG00000231344" "ENSG00000269971" "ENSG00000270031"
[197] "ENSG00000286433" "ENSG00000227050" "ENSG00000289576" "ENSG00000270605" "ENSG00000229820" "ENSG00000279443" "LINC01715"
[204] "ENSG00000233427" "TMEM200B" "ENSG00000225750" "LINC01756" "ENSG00000229607" "LINC01778" "SNRNP40"
[211] "ENSG00000229447" "ENSG00000229044" "LINC01226" "ENSG00000203620" "ENSG00000269967" "ENSG00000284702" "ENSG00000203325"
[218] "ENSG00000250135" "ENSG00000224066" "ENSG00000233775" "ENSG00000254553" "S100PBP" "ENSG00000287691" "ENSG00000278966"
[225] "ENSG00000239670" "ENSG00000236065" "ENSG00000278997" "ENSG00000279179" "ENSG00000284721" "ENSG00000270115" "ENSG00000225313"
[232] "ZSCAN20" "ENSG00000235907" "ENSG00000287703" "ENSG00000284773" "ENSG00000271741" "ENSG00000284640" "RN7SL503P"
[239] "KIAA0319L" "ENSG00000236274" "ENSG00000232335" "ENSG00000286899" "ENSG00000271914" "ENSG00000271554" "ENSG00000232862"
[246] "ENSG00000286379" "STK40" "LSM10" "ENSG00000284705" "ENSG00000284650" "ENSG00000223944" "LINC01137"
[253] "ENSG00000284748" "C1orf109" "EPHA10" "ENSG00000230955" "ENSG00000223589" "ENSG00000286552" "ENSG00000287987"
[260] "LINC01685" "ENSG00000284632" "ENSG00000273637" "ENSG00000228436" "ENSG00000274944" "ENSG00000287422" "ENSG00000226438"
[267] "ENSG00000225333" "ENSG00000261798" "ENSG00000284719" "LINC02811" "ENSG00000228477" "ENSG00000231296" "ENSG00000213172"
[274] "ENSG00000284677" "ENSG00000260920" "ENSG00000279667" "ENSG00000238287" "ENSG00000238186" "ENSG00000286838" "ENSG00000287743"
[281] "ENSG00000237899" "ENSG00000229528" "ENSG00000287400" "ENSG00000287587" "CCDC30" "ENSG00000236180" "ENSG00000285728"
[288] "ENSG00000234917" "C1orf50" "ENSG00000283580" "ENSG00000228192" "ENSG00000283973" "ENSG00000234694" "CDC20"
[295] "ENSG00000288772" "ENSG00000284989" "ENSG00000237950" "ENSG00000288573" "ENSG00000285649" "ATP6V0B" "ENSG00000230615"
[302] "ENSG00000227163" "RNF220" "ENSG00000225721" "ENSG00000226499" "LINC01144" "ENSG00000288208" "ENSG00000289407"
[309] "ENSG00000281112" "RPS15AP10" "ENSG00000234329" "ENSG00000230896" "ENSG00000226957" "ENSG00000233114" "ENSG00000227857"
[316] "LINC00853" "ENSG00000226252" "LINC01389" "LINC02794" "ENSG00000279096" "ENSG00000223720" "ENSG00000272491"
[323] "ENSG00000279214" "ENSG00000287661" "ENSG00000229846" "ENSG00000237478" "ENSG00000230828" "LINC01562" "ENSG00000233406"
[330] "ENSG00000236434" "ENSG00000266993" "ENSG00000272175" "ENSG00000223390" "ANAPC10P1" "ENSG00000272100" "ENSG00000287078"
[337] "ENSG00000272371" "ENSG00000230953" "ENSG00000235563" "ENSG00000236723" "ENSG00000226754" "ENSG00000234578" "ENSG00000228838"
[344] "LINC01771" "LINC02812" "ENSG00000280378" "ENSG00000225183" "ENSG00000256407" "ENSG00000225632" "ENSG00000287582"
[351] "ENSG00000230728" "ENSG00000287724" "ENSG00000242396" "ENSG00000233271" "ENSG00000234810" "LINC01753" "ENSG00000235612"
[358] "ENSG00000260971" "ENSG00000284686" "RPSAP20" "LINC01767" "ENSG00000227935" "RPS20P5" "ENSG00000235038"
[365] "ENSG00000286918" "ENSG00000185839" "ENSG00000283445" "LINC01135" "LINC02777" "LINC01358" "ENSG00000235215"
[372] "ENSG00000270457" "ENSG00000226883" "LINC01748" "ENSG00000231252" "ENSG00000287224" "RN7SL180P" "ENSG00000213703"
[379] "ENSG00000235545" "LINC01739" "LINC00466" "ENSG00000286429" "RN7SL130P" "LINC01359" "ENSG00000272506"
[386] "ENSG00000229294" "ENSG00000248458" "ENSG00000231080" "ENSG00000289394" "ENSG00000275678" "ENSG00000233589" "ENSG00000285407"
[393] "ENSG00000285473" "LINC01707" "LINC02791" "ENSG00000287453" "LRRC40" "ENSG00000271992" "LINC01788"
[400] "ENSG00000269933" "ENSG00000226324" "ENSG00000231985" "ENSG00000286863" "ENSG00000225087" "LINC02796" "LINC02797"
[407] "ENSG00000285778" "ENSG00000237324" "ENSG00000272864" "ENSG00000224493" "RNU6-503P" "ENSG00000230863" "ENSG00000225605"
[414] "ENSG00000230027" "ENSG00000272855" "ENSG00000288543" "ENSG00000230498" "ENSG00000228187" "ENSG00000289212" "ENSG00000287870"
[421] "RN7SL370P" "ENSG00000273338" "ENSG00000238015" "ENSG00000235400" "ENSG00000288822" "ENSG00000285409" "LINC01781"
[428] "ENSG00000285179" "ENSG00000234953" "ENSG00000236676" "ENSG00000233290" "LINC01362" "LINC01361" "LINC01725"
[435] "ENSG00000229486" "ENSG00000285201" "ENSG00000285851" "ENSG00000284882" "ENSG00000280099" "BCL10" "ENSG00000223653"
[442] "ENSG00000282057" "ENSG00000272691" "LINC02795" "ENSG00000284846" "ENSG00000267734" "ENSG00000267561" "ENSG00000225568"
[449] "LINC01140" "LINC01364" "ENSG00000279778" "ENSG00000286758" "ENSG00000286802" "ENSG00000284734" "ENSG00000233235"
[456] "ENSG00000286548" "ENSG00000271949" "ENSG00000231613" "ENSG00000272672" "ENSG00000287406" "ENSG00000287372" "LINC02787"
[463] "LINC02609" "LINC02788" "LINC01763" "ENSG00000272094" "ENSG00000229067" "ENSG00000289483" "ENSG00000273487"
[470] "ENSG00000223787" "ENSG00000225505" "ENSG00000229052" "ENSG00000289544" "ENSG00000287797" "ENSG00000237003" "ENSG00000229567"
[477] "ENSG00000260464" "ENSG00000236098" "ENSG00000288736" "ENSG00000223675" "ENSG00000231992" "ENSG00000226026" "LINC02607"
[484] "LINC02790" "LINC01787" "ENSG00000230718" "ENSG00000259946" "ENSG00000285922" "LINC01776" "ENSG00000232825"
[491] "ENSG00000227034" "LINC01708" "ENSG00000228084" "ENSG00000283761" "RNU6-750P" "ENSG00000288826" "ENSG00000241073"
[498] "ENSG00000285530" "ENSG00000228086" "LINC01349" "ENSG00000285525" "ENSG00000273204" "SLC30A7" "ENSG00000235795"
[505] "ENSG00000233184" "LINC01307" "LINC01709" "ENSG00000289192" "ENSG00000233359" "ENSG00000230864" "ENSG00000224613"
[512] "ENSG00000215869" "ENSG00000285981" "LINC01676" "ENSG00000232952" "LINC01677" "ENSG00000237480" "LINC01661"
[519] "ENSG00000289612" "LINC02785" "FAM102B" "ENSG00000285923" "ENSG00000237349" "ENSG00000251484" "ENSG00000225113"
[526] "ENSG00000260246" "LINC01768" "ENSG00000258634" "LINC01397" "ENSG00000273373" "ENSG00000259834" "ENSG00000288847"
[533] "ENSG00000288803" "ENSG00000261654" "ENSG00000232811" "ENSG00000273010" "ENSG00000273221" "ENSG00000229283" "ENSG00000260948"
[540] "ENSG00000243960" "LINC01160" "ENSG00000284830" "DDX20" "LINC01750" "LINC02884" "ENSG00000273483"
[547] "MOV10" "ENSG00000225075" "LINC01356" "LINC01357" "ENSG00000287807" "ENSG00000232499" "ENSG00000232450"
[554] "ENSG00000231128" "ENSG00000232895" "LINC01765" "ENSG00000237993" "LINC01762" "ENSG00000224950" "ENSG00000286276"
[561] "CD101" "ENSG00000236137" "LINC01525" "ENSG00000271427" "ENSG00000279513" "ENSG00000227712" "LINC01780"
[568] "LINC00622" "ENSG00000274642" "ENSG00000223804" "ENSG00000287979" "LINC00623" "ENSG00000234998" "ENSG00000227193"
[575] "LINC02798" "ENSG00000272583" "ENSG00000277702" "ENSG00000274927" "ENSG00000223495" "LINC02799" "ENSG00000232721"
[582] "ENSG00000223779" "ENSG00000230186" "ENSG00000289318" "LINC02802" "ENSG00000237343" "ENSG00000233586" "LINC01632"
[589] "ENSG00000224363" "ENSG00000283752" "LINC01145" "ENSG00000276216" "ENSG00000236140" "NBPF20" "ENSG00000286620"
[596] "CD160" "ITGA10" "ENSG00000289565" "ENSG00000287190" "LINC01719" "NBPF10" "ENSG00000276509"
[603] "ENSG00000287978" "ENSG00000289321" "ENSG00000227242" "ENSG00000237188" "LINC00624" "ENSG00000289419" "ENSG00000234190"
[610] "ENSG00000273059" "LINC02804" "LINC02805" "ENSG00000227733" "LINC01731" "LINC01138" "ENSG00000224481"
[617] "ENSG00000272824" "ENSG00000225871" "ENSG00000280649" "ENSG00000289642" "ENSG00000255148" "ENSG00000254539" "ENSG00000274265"
[624] "ENSG00000289614" "ENSG00000215861" "ENSG00000272755" "ENSG00000286185" "LINC00869" "ENSG00000233030" "H2BC20P"
[631] "ENSG00000264207" "H2AC20" "ENSG00000285184" "ENSG00000223945" "ENSG00000285554" "ENSG00000276110" "ENSG00000289041"
[638] "ENSG00000289457" "RN7SL600P" "ENSG00000253047" "ENSG00000288880" "RNU6-1309P" "ENSG00000261168" "ENSG00000289288"
[645] "ENSG00000273481" "ENSG00000232536" "ENSG00000223861" "ENSG00000250734" "ENSG00000227045" "ENSG00000269621" "ENSG00000249602"
[652] "ENSG00000269489" "ENSG00000285651" "S100A10" "ENSG00000229021" "S100A11" "ENSG00000236427" "S100A9"
[659] "S100A12" "S100A8" "S100A6" "S100A4" "S100A3" "S100A2" "ENSG00000285867"
[666] "S100A16" "S100A13" "ENSG00000271853" "S100A1" "ENSG00000272030" "ENSG00000243613" "ENSG00000231827"
[673] "ENSG00000223599" "ENSG00000236327" "ENSG00000284738" "ENSG00000285779" "ENSG00000282386" "ENSG00000273026" "ENSG00000285641"
[680] "ENSG00000272654" "NUP210L" "ENSG00000231416" "AQP10" "TDRD10" "ENSG00000286391" "ENSG00000287064"
[687] "ENSG00000270361" "ENSG00000271380" "SLC50A1" "ENSG00000273088" "ENSG00000236263" "ENSG00000232519" "ENSG00000246203"
[694] "ENSG00000287839" "ENSG00000227673" "ENSG00000234937" "ENSG00000285677" "ENSG00000252236" "ENSG00000237390" "ENSG00000289593"
[701] "ENSG00000260460" "ENSG00000272405" "ENSG00000229953" "ENSG00000285570" "ENSG00000223356" "ISG20L2" "ENSG00000229961"
[708] "ENSG00000236957" "LINC01704" "ENSG00000176320" "ENSG00000228560" "ENSG00000289484" "ENSG00000256029" "ENSG00000272668"
[715] "LINC01133" "KCNJ10" "ENSG00000225279" "ENSG00000227741" "ENSG00000258465" "ENSG00000228606" "ENSG00000234425"
[722] "ENSG00000228863" "ENSG00000198358" "ENSG00000270149" "ENSG00000289121" "ARHGAP30" "ENSG00000238934" "ENSG00000224985"
[729] "TOMM40L" "ENSG00000289106" "ENSG00000288670" "ENSG00000283696" "ENSG00000288093" "ENSG00000283360" "ENSG00000215840"
[736] "ENSG00000273112" "ENSG00000226889" "ENSG00000229808" "ENSG00000285636" "ENSG00000254706" "ENSG00000272574" "CCDC190"
[743] "ENSG00000230739" "ENSG00000269887" "ENSG00000271917" "ENSG00000236206" "ENSG00000215838" "ENSG00000236364" "RPS3AP10"
[750] "ENSG00000230898" "ENSG00000229588" "ENSG00000225325" "LINC01675" "LINC01363" "ENSG00000272033" "ENSG00000287218"
[757] "ENSG00000273160" "ADCY10" "ENSG00000250762" "ENSG00000227722" "ENSG00000228697" "ENSG00000285622" "LINC00626"
[764] "ENSG00000235575" "ENSG00000213062" "ENSG00000288139" "LINC01681" "LINC01142" "ENSG00000225545" "ENSG00000235303"
[771] "ENSG00000271811" "ENSG00000231424" "ENSG00000213060" "ENSG00000287336" "C1orf105" "ENSG00000224228" "ENSG00000231615"
[778] "ENSG00000238272" "ENSG00000285777" "ENSG00000289426" "KLHL20" "ENSG00000225591" "RN7SKP160" "Y-RNA.50"
[785] "ENSG00000237249" "ENSG00000289425" "RNU6-307P" "ENSG00000287697" "KIAA0040" "ENSG00000260990" "LINC02803"
[792] "ENSG00000236021" "ENSG00000227815" "ENSG00000286754" "LINC01645" "ENSG00000227579" "ENSG00000213058" "CLEC20A"
[799] "C1orf220" "ENSG00000273384" "FAM20B" "ENSG00000212338" "ENSG00000225711" "ENSG00000243062" "LINC02818"
[806] "ENSG00000272906" "RN7SL230P" "ENSG00000261831" "ENSG00000260360" "CEP350" "ENSG00000261817" "LINC02816"
[813] "ENSG00000243155" "ENSG00000251520" "LINC01732" "LINC01699" "ENSG00000288574" "ENSG00000272880" "ENSG00000287452"
[820] "ENSG00000224810" "LINC01344" "ENSG00000287808" "ENSG00000287929" "ENSG00000286372" "ENSG00000289581" "ENSG00000286655"
[827] "ENSG00000230470" "ENSG00000285847" "ENSG00000286378" "ENSG00000233583" "ENSG00000238061" "ENSG00000279838" "ENSG00000273004"
[834] "ENSG00000261729" "ENSG00000273198" "ENSG00000228238" "ENSG00000288562" "LINC01036" "ENSG00000285894" "ENSG00000238054"
[841] "LINC01035" "LINC01701" "ENSG00000287472" "ENSG00000230987" "ENSG00000225811" "LINC01351" "ENSG00000241505"
[848] "LINC01720" "ENSG00000285638" "LINC02770" "ENSG00000285280" "ENSG00000236069" "ZNF101P2" "RO60"
[855] "LINC01031" "ENSG00000286285" "ENSG00000227240" "ENSG00000285718" "FAM204BP" "ENSG00000224901" "ENSG00000287989"
[862] "ENSG00000261573" "LINC01222" "LINC01221" "ENSG00000286541" "LINC02789" "LINC00862" "ENSG00000230623"
[869] "ENSG00000282849" "ENSG00000229191" "ENSG00000224536" "RPS10P7" "ENSG00000236390" "ENSG00000223774" "ENSG00000232626"
[876] "ENSG00000226862" "ENSG00000235449" "ENSG00000234775" "ENSG00000224671" "NPM1P40" "LINC01353" "ENSG00000288925"
[883] "LINC01136" "ENSG00000227417" "ENSG00000286383" "ENSG00000286572" "ENSG00000219133" "ENSG00000226330" "ENSG00000288934"
[890] "ENSG00000240710" "ENSG00000287197" "ENSG00000229657" "SNRPGP10" "ENSG00000236942" "ENSG00000285521" "ENSG00000286619"
[897] "PM20D1" "ENSG00000285417" "ENSG00000226780" "ENSG00000236889" "ENSG00000261000" "ENSG00000234981" "ENSG00000279946"
[904] "ENSG00000224114" "IL10" "ENSG00000275392" "ENSG00000237074" "ENSG00000283044" "ENSG00000286198" "ENSG00000232537"
[911] "G0S2" "ENSG00000287343" "ENSG00000287354" "ENSG00000279333" "ENSG00000284299" "ENSG00000284376" "ENSG00000287033"
[918] "LINC00467" "SLC30A1" "ENSG00000288738" "LINC01693" "ENSG00000231057" "ENSG00000226868" "ENSG00000229983"
[925] "LINC02608" "ENSG00000230063" "ENSG00000234915" "ENSG00000287445" "LINC02771" "ENSG00000286213" "ENSG00000260805"
[932] "LINC02773" "ENSG00000236905" "ENSG00000236317" "ENSG00000225233" "ENSG00000228255" "LINC00538" "ENSG00000274895"
[939] "ENSG00000272167" "LINC02775" "ENSG00000228470" "ENSG00000287008" "ENSG00000229242" "LINC00210" "LINC01653"
[946] "ENSG00000223375" "LINC02869" "ENSG00000277007" "ENSG00000287676" "SLC30A10" "ENSG00000286231" "LINC01352"
[953] "LINC02817" "DUSP10" "LINC01655" "LINC02257" "LINC02474" "LINC01705" "ENSG00000236230"
[960] "ENSG00000276997" "ENSG00000267305" "ENSG00000272750" "ENSG00000288824" "ENSG00000287684" "ENSG00000287338" "ENSG00000226601"
[967] "ENSG00000288999" "GTF2IP20" "ENSG00000278467" "ENSG00000232628" "ENSG00000237101" "ENSG00000229742" "ENSG00000286174"
[974] "LINC02813" "ENSG00000286719" "LINC02765" "ENSG00000282418" "ENSG00000289602" "ENSG00000227496" "ENSG00000226349"
[981] "ENSG00000242861" "ENSG00000255835" "ENSG00000289341" "LINC01703" "ENSG00000275406" "ENSG00000287627" "ENSG00000288674"
[988] "ENSG00000287532" "ENSG00000228625" "ENSG00000236636" "ENSG00000286389" "LINC02809" "ENSG00000280157" "ENSG00000269934"
[995] "ENSG00000270110" "ENSG00000286773" "ENSG00000270104" "ENSG00000231563" "ENSG00000279306" "ENSG00000206878"
[ reached getOption("max.print") -- omitted 11705 entries ]
# miQC is designed to be run with the Bioconductor package scater, which has a built-in function addPerCellQC to calculate basic QC metrics like number of unique genes detected per cell and total number of reads. When we pass in our list of mitochondrial genes, it will also calculate percent mitochondrial reads.
sce <- addPerCellQC(sce, subsets = feature_ctrls)
head(colData(sce))
DataFrame with 6 rows and 11 columns
orig.ident nCount_RNA nFeature_RNA pct_mito ident sum detected subsets_mito_sum subsets_mito_detected
<factor> <numeric> <integer> <numeric> <factor> <numeric> <integer> <numeric> <integer>
7316-371:AAACCCAAGAGCGACT 7316-371 1875 1305 0.000 7316-371 1875 1305 130 95
7316-371:AAACCCAAGATCGCCC 7316-371 2960 1771 0.034 7316-371 2960 1771 254 175
7316-371:AAACCCACAAGCTCTA 7316-371 809 596 0.000 7316-371 809 596 61 51
7316-371:AAACCCACACAACATC 7316-371 4000 2212 0.000 7316-371 4000 2212 291 200
7316-371:AAACCCACACACAGCC 7316-371 1701 1202 0.000 7316-371 1701 1202 152 110
7316-371:AAACGAAAGAGAGAAC 7316-371 3225 1913 0.000 7316-371 3225 1913 237 160
subsets_mito_percent total
<numeric> <numeric>
7316-371:AAACCCAAGAGCGACT 6.93333 1875
7316-371:AAACCCAAGATCGCCC 8.58108 2960
7316-371:AAACCCACAAGCTCTA 7.54017 809
7316-371:AAACCCACACAACATC 7.27500 4000
7316-371:AAACCCACACACAGCC 8.93592 1701
7316-371:AAACGAAAGAGAGAAC 7.34884 3225
print(plotMetrics(sce))
model <- mixtureModel(sce)
parameters(model)
Comp.1 Comp.2
coef.(Intercept) 7.8486196586 4.735002213
coef.detected 0.0001542046 0.001813925
sigma 0.7421263579 1.399036934
head(posterior(model))
[,1] [,2]
[1,] 0.5027739 0.4972261
[2,] 0.7401113 0.2598887
[3,] 0.8518726 0.1481274
[4,] 0.7170284 0.2829716
[5,] 0.8084498 0.1915502
[6,] 0.6790415 0.3209585
# Plot
# The cells at the very top of the graph are almost certainly compromised, most likely to have been derived from the distribution with fewer unique genes and higher baseline mitochondrial expression.
print(plotModel(sce, model))
# Attention: To add code for cases when miQC won't work
# https://github.com/AlexsLemonade/scpca-nf/blob/main/bin/filter_sce.R
# Plot
print(plotFiltering(sce, model))
# Filter and remove the indicated cells by miQC
filtered_sce <- filterCells(sce, model)
Removing 837 out of 3564 cells.
# Convert sce back to seurat object
seurat_obj <- as.Seurat(filtered_sce,
counts = "counts",
data = "logcounts")
# Filter data by min_genes
seurat_obj <- subset(seurat_obj, subset = nFeature_RNA > min_genes)
# How many cells are in the seurat object after filtering?
cells_so_after_filter_num <- ncol(seurat_obj)
There are 2727 cells in the seurat object after filtering.
# For each gene, calculate % of cells expressing it and the mean
pct_cells <- apply(as.data.frame(seurat_obj@assays$RNA@counts), 1, function(x) (sum(x != 0))) / ncol(seurat_obj@assays$RNA@counts)
gene_means <- rowMeans(as.data.frame(seurat_obj@assays$RNA@counts))
p <- as.data.frame(cbind(pct_cells, gene_means)) %>%
rownames_to_column("gene")
# Set seed
set.seed(2023)
ggplot(data = p, aes(x = gene_means, y = pct_cells, label = gene)) +
geom_point(size = 3, alpha = 0.6) +
xlab("Mean count per gene") +
ylab("Percent cells expressing gene") +
stat_dens2d_labels(geom = "text_repel", keep.fraction = 0.001)
# number of expressed genes
num_genes <- apply(as.data.frame(seurat_obj@assays$RNA@counts), 2, function(x) (sum(x != 0)))
libsize <- colSums(as.data.frame(seurat_obj@assays$RNA@counts))
l <- as.data.frame(cbind(num_genes, libsize))
# Set seed
set.seed(2023)
ggplot(data = l, aes(x = log10(libsize), y = num_genes)) +
geom_point(size = 3, alpha = 0.6) +
xlab("Library Size (log10)") +
ylab("Number of Expressed Genes")
# plot number of genes
genes_sing <- plot_distribution(seurat_obj,
features = "nFeature_RNA",
grouping = grouping) +
geom_hline(yintercept = min_genes, color = "#D41159", size = 2) +
#geom_hline(yintercept = max(seurat_obj@meta.data$nFeature_RNA), color = "#D41159", size = 2) +
scale_fill_manual(values = alpha(c("#10559a"), .4)) +
#annotate(geom="text", x = 0.7, y = max(seurat_obj@meta.data$nFeature_RNA), label= glue(""), color="#D41159") +
annotate(geom="text", x = 0.7, y = min_genes - 200, label=glue("Min genes: {min_genes}"), color="#D41159")
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
# plot number of UMIs
umi_sing <- plot_distribution(seurat_obj,
features = "nCount_RNA",
grouping = grouping) +
scale_fill_manual(values = alpha(c("#10559a"), .4))
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
# plot percent mitochondrial reads
mito_sing <- plot_distribution(seurat_obj,
features = "pct_mito",
grouping = grouping) +
#geom_hline(yintercept = max(seurat_obj@meta.data$pct_mito), color = "#D41159", size = 2) +
scale_fill_manual(values = alpha(c("#10559a"), .4))
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
#annotate(geom="text", x = 0.7, y = max(seurat_obj@meta.data$pct_mito), label = glue(""), color="#D41159")
# get the legend for one of the plots to use as legend for the combined plot
legend_grid <- get_legend(mito_sing)
# Combine plots
plot_grid(genes_sing + theme(legend.position = "none"),
umi_sing + theme(legend.position = "none"),
mito_sing + theme(legend.position = "none"),
legend_grid,
ncol = 4)
# normalize seurat object using specified method, on specified assay
seurat_obj <- NormalizeData(seurat_obj, normalization.method = "LogNormalize",
nfeatures = 2000, assay = "RNA")
Warning: The following arguments are not used: nfeatures
seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "vst", nfeatures = 2000)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
seurat_obj <- ScaleData(seurat_obj, features = VariableFeatures(seurat_obj))
Centering and scaling data matrix
We will run PCA to reduce dimensionality of the matrix (as defined in
the params).
# run PCA on specified assay using the number of PCs specified
seurat_obj <- run_dr(data = seurat_obj,
dr_method = "pca",
var_features = TRUE,
assay = assay,
num_pcs = num_pcs,
prefix = prefix)
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from PClognorm to PClognorm_
We will run UMAP based on the previous estimated PCs and for a
variety of combinations as for the number of dimensions and neighbors
(as defined in the params) to explore the clustering of
each sample in the library.
# Create a dataframe of all of the possible combinations of number of PCs
# to use for UMAP, and the number of neighbors
num_dim_vect <- c(num_dim)
num_neighbors_vect <- c(num_neighbors)
possibilities <- expand.grid(num_dim_vect, num_neighbors_vect)
# For each of these combinations, calculate UMAP
for(i in 1:nrow(possibilities)) {
num_dim <- possibilities[i, 1]
num_neighbors <- possibilities[i, 2]
seurat_obj <- run_dr(data = seurat_obj, dr_method = "umap", reduction = paste0("pca", prefix),
num_dim_use = num_dim, assay = "RNA", num_neighbors = num_neighbors,
prefix = glue("ndim{num_dim}nn{num_neighbors}{prefix}"))
}
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from UMAPndim20nn30lognorm to UMAPndim20nn30lognorm_Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from UMAPndim25nn30lognorm to UMAPndim25nn30lognorm_Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from UMAPndim20nn20lognorm to UMAPndim20nn20lognorm_Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from UMAPndim25nn20lognorm to UMAPndim25nn20lognorm_Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from UMAPndim20nn10lognorm to UMAPndim20nn10lognorm_Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Found more than one class "dist" in cache; using the first, from namespace 'BiocGenerics'
Also defined by ‘spam’
Warning: Keys should be one or more alphanumeric characters followed by an underscore, setting key from UMAPndim25nn10lognorm to UMAPndim25nn10lognorm_
# Generate metadata
reduction_names <- c(paste0("umap", "ndim", possibilities[,1], "nn", possibilities[,2], prefix), paste0("pca", prefix)) # Export the reductions to Seurat
metadata <- as_data_frame_seurat(seurat_obj, reduction = reduction_names,
metadata = TRUE)
# plot the first two PCs, and all of the different UMAPs
reduction_names <- c(paste0("UMAP", "ndim", possibilities[,1], "nn", possibilities[,2], prefix), paste0("PC", prefix))
plot_dr <- data.frame(X = paste0(reduction_names, "_1"),
Y = paste0(reduction_names, "_2"),
stringsAsFactors = FALSE)
for(i in 1:nrow(plot_dr)){
print(current_plot <- plot_scatter(metadata = metadata,
scratch_dir,
proj_name = sample_name,
log_file = log_file,
X = plot_dr[i,1],
Y = plot_dr[i,2],
color = grouping,
write = TRUE))
print(current_plot <- plot_scatter(metadata = metadata,
scratch_dir,
proj_name = sample_name,
log_file = log_file,
X = plot_dr[i,1],
Y = plot_dr[i,2],
color = "nFeature_RNA",
write = TRUE))
print(current_plot <- plot_scatter(metadata = metadata,
scratch_dir,
proj_name = sample_name,
log_file = log_file,
X = plot_dr[i,1],
Y = plot_dr[i,2],
color = "nCount_RNA",
write = TRUE))
print(current_plot <- plot_scatter(metadata = metadata,
scratch_dir,
proj_name = sample_name,
log_file = log_file,
X = plot_dr[i,1],
Y = plot_dr[i,2],
color = "pct_mito",
write = TRUE))
}
Let’s save the filtered seurat object to be used for further
downstream analysis (seurat_obj.rds).
# Save metadata
write_tsv(metadata, path = glue("{results_dir}/metadata_create_{sample_name}.tsv"))
Warning: The `path` argument of `write_tsv()` is deprecated as of readr 1.4.0.
Please use the `file` argument instead.
# Save Seurat object
saveRDS(seurat_obj, file = paste0(results_dir, "/", "seurat_obj.rds"))
R version 4.3.1 (2023-06-16)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.6
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/New_York
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] scooter_0.0.0.9004 testthat_3.2.0 irlba_2.3.5.1 Matrix_1.6-3 SeuratWrappers_0.2.0
[6] flexmix_2.3-19 lattice_0.22-5 SeuratObject_5.0.1 Seurat_4.4.0 scater_1.30.0
[11] scuttle_1.12.0 SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0 Biobase_2.62.0 GenomicRanges_1.54.1
[16] GenomeInfoDb_1.38.1 IRanges_2.36.0 S4Vectors_0.40.1 BiocGenerics_0.48.1 MatrixGenerics_1.14.0
[21] matrixStats_1.1.0 miQC_1.10.0 ggrepel_0.9.4 ggpmisc_0.5.5 ggpp_0.5.5
[26] stringr_1.5.1 GGally_2.1.2 ggplot2_3.4.4 forcats_1.0.0 devtools_2.4.5
[31] usethis_2.2.2 cowplot_1.1.1 future_1.33.0
loaded via a namespace (and not attached):
[1] fs_1.6.3 spatstat.sparse_3.0-3 bitops_1.0-7 httr_1.4.7 RColorBrewer_1.1-3
[6] ggsci_3.0.0 profvis_0.3.8 tools_4.3.1 sctransform_0.4.1 utf8_1.2.4
[11] R6_2.5.1 lazyeval_0.2.2 uwot_0.1.16 urlchecker_1.0.1 withr_2.5.2
[16] sp_2.1-1 prettyunits_1.2.0 gridExtra_2.3 progressr_0.14.0 textshaping_0.3.7
[21] quantreg_5.97 cli_3.6.1 spatstat.explore_3.2-5 labeling_0.4.3 sass_0.4.7
[26] spatstat.data_3.0-3 readr_2.1.4 ggridges_0.5.4 pbapply_1.7-2 systemfonts_1.0.5
[31] parallelly_1.36.0 sessioninfo_1.2.2 rstudioapi_0.15.0 FNN_1.1.3.2 generics_0.1.3
[36] vroom_1.6.4 ica_1.0-3 spatstat.random_3.2-1 dplyr_1.1.4 ggbeeswarm_0.7.2
[41] fansi_1.0.5 abind_1.4-5 lifecycle_1.0.4 yaml_2.3.7 SparseArray_1.2.2
[46] Rtsne_0.16 grid_4.3.1 promises_1.2.1 crayon_1.5.2 miniUI_0.1.1.1
[51] beachmat_2.18.0 pillar_1.9.0 knitr_1.45 future.apply_1.11.0 codetools_0.2-19
[56] leiden_0.4.3.1 glue_1.6.2 data.table_1.14.8 remotes_2.4.2.1 vctrs_0.6.4
[61] png_0.1-8 spam_2.10-0 gtable_0.3.4 cachem_1.0.8 xfun_0.41
[66] S4Arrays_1.2.0 mime_0.12 survival_3.5-7 ellipsis_0.3.2 fitdistrplus_1.1-11
[71] ROCR_1.0-11 nlme_3.1-163 bit64_4.0.5 RcppAnnoy_0.0.21 rprojroot_2.0.4
[76] bslib_0.5.1 vipor_0.4.5 KernSmooth_2.23-22 colorspace_2.1-0 nnet_7.3-19
[81] tidyselect_1.2.0 processx_3.8.2 bit_4.0.5 compiler_4.3.1 BiocNeighbors_1.20.0
[86] SparseM_1.81 desc_1.4.2 DelayedArray_0.28.0 plotly_4.10.3 scales_1.2.1
[91] lmtest_0.9-40 callr_3.7.3 digest_0.6.33 goftest_1.2-3 spatstat.utils_3.0-4
[96] rmarkdown_2.25 XVector_0.42.0 htmltools_0.5.7 pkgconfig_2.0.3 sparseMatrixStats_1.14.0
[101] fastmap_1.1.1 rlang_1.1.2 htmlwidgets_1.6.2 shiny_1.8.0 DelayedMatrixStats_1.24.0
[106] farver_2.1.1 jquerylib_0.1.4 zoo_1.8-12 jsonlite_1.8.7 BiocParallel_1.36.0
[111] BiocSingular_1.18.0 RCurl_1.98-1.13 magrittr_2.0.3 polynom_1.4-1 modeltools_0.2-23
[116] GenomeInfoDbData_1.2.11 dotCall64_1.1-0 patchwork_1.1.3 munsell_0.5.0 Rcpp_1.0.11
[121] viridis_0.6.4 reticulate_1.34.0 stringi_1.8.1 brio_1.1.3 zlibbioc_1.48.0
[126] MASS_7.3-60 plyr_1.8.9 pkgbuild_1.4.2 parallel_4.3.1 listenv_0.9.0
[131] deldir_1.0-9 splines_4.3.1 tensor_1.5 hms_1.1.3 ps_1.7.5
[136] igraph_1.5.1 spatstat.geom_3.2-7 reshape2_1.4.4 ScaledMatrix_1.10.0 pkgload_1.3.3
[141] evaluate_0.23 BiocManager_1.30.22 tzdb_0.4.0 httpuv_1.6.12 MatrixModels_0.5-3
[146] RANN_2.6.1 tidyr_1.3.0 purrr_1.0.2 polyclip_1.10-6 reshape_0.8.9
[151] scattermore_1.2 rsvd_1.0.5 xtable_1.8-4 later_1.3.1 viridisLite_0.4.2
[156] ragg_1.2.6 tibble_3.2.1 memoise_2.0.1 beeswarm_0.4.0 cluster_2.1.4
[161] globals_0.16.2